% Auxiliary Function: mc_weak_factors
%   Description:
%       This function performs a Monte Carlo simulation
%   Inputs:
%       params: struct of parameters, which includes
%           reps: Number of Monte Carlo repetitions
%           N, T: Dimensions of the panel data (units and time periods)
%           R0: True number of factors in the data generating process (DGP)
%           kappa: Parameters controlling the strength of factors
%           beta0: True coefficients
%           DGP_id: Select among different DGPs, where default is set to 1
%           folder_name: Name of the folder for saving results
%           R_arr: Array of number of factors
%           outputfile: File name for saving
%   Outputs:
%       out: struct contains all simulation results 

function out = mc_weak_factors(params)
    % ========================================
    % Parameter setting
    % ========================================
    % Extracts parameters from params
    reps = params.reps;
    N = params.N;
    T = params.T;
    R0 = params.R0;
    kappa = params.kappa;
    beta0 = params.beta0;
    % Determine which DGP is used for this simulation, where default is set to DGP_id = 1
    DGP_id = get_default_field(params, 'DGP_id', 1);
    % Only perform clustered standard errors in DGP 2
    clustered_se_flag = (DGP_id == 2);
    % Displays all the main input parameters for user info
    disp([10 '# Running MC-WEAK-FACTORS with the following parameters:'])
    disp(['    DGP ID   = ' num2str(DGP_id)])
    disp(['    rep   = ' num2str(reps)])
    disp(['    N     = ' num2str(N)])
    disp(['    T     = ' num2str(T)])
    disp(['    R0    = ' num2str(R0)])
    disp(['    kappa = ' num2str(kappa)])
    disp(['    beta0 = ' num2str(beta0)])
    disp(['# Starting MC runs:'])
    % Creates output folder if it does not exist
    if isfield(params,'folder_name')
        folder_name = params.folder_name;
        if ~isfolder(folder_name)
            mkdir(folder_name)
        end
    else
        folder_name = 'matlogs';
    end
    % ========================================
    % Setting up data for simulation
    % ========================================
    % Select DGP 
    dgp_fn = @(N,T) get_dgp_fn (N,T,beta0,R0,kappa,DGP_id);
    % Defines repetition range for optimization
    repMIN=3;
    repMAX=10;
    % Defines set of factor numbers to evaluate
    R_arr = get_default_field(params, 'R_arr', R0);
    N_R = length(R_arr);
    % Prepare arrays for outputs (LS estimates)
    beta_LS = zeros(reps,N_R);     
    beta_LS_BC1 = zeros(reps,N_R); 
    beta_LS_BC2 = zeros(reps,N_R); 
    beta_LS_std1 = zeros(reps,N_R); 
    beta_LS_std2 = zeros(reps,N_R); 
    beta_LS_std3 = zeros(reps,N_R); 
    % Prepare arrays for outputs (Honest weak factor estimates)
    beta_h = zeros(reps,N_R);
    beta_h_bias = zeros(reps,N_R);
    beta_h_se = zeros(reps,N_R);
    beta_h_LB = zeros(reps,N_R);
    beta_h_UB = zeros(reps,N_R);
    beta_h_LB_s = zeros(reps,N_R);
    beta_h_UB_s = zeros(reps,N_R);
    beta_h_lind = zeros(reps,N_R);
    lind_fn = @(A) max(max(A(:,:).^2))/sum(sum(A(:,:).^2));
    % ========================================
    % Main Monte Carlo Loop
    % ========================================
    parfor rep=1:reps
        ticID_iter = tic;
        rng_for_current_rep(rep)
        % Generate data
        [Y,XX] = dgp_fn(N,T);
        % Prepare arrays for outputs
        beta_LS_tmp = zeros(1,N_R);
        beta_LS_BC1_tmp = zeros(1,N_R);
        beta_LS_BC2_tmp = zeros(1,N_R);
        beta_LS_std1_tmp = zeros(1,N_R); 
        beta_LS_std2_tmp = zeros(1,N_R);
        beta_LS_std3_tmp = zeros(1,N_R);
        beta_h_tmp = zeros(1,N_R);
        beta_h_bias_tmp = zeros(1,N_R);
        beta_h_se_tmp = zeros(1,N_R);
        beta_h_LB_tmp = zeros(1,N_R);
        beta_h_UB_tmp = zeros(1,N_R);
        beta_h_LB_s_tmp = zeros(1,N_R);
        beta_h_UB_s_tmp = zeros(1,N_R);
        beta_h_lind_tmp = zeros(1,N_R);    
        % Loop over all numbers of factors
        for i_R=1:N_R 
            R = R_arr(i_R);
            % Perform LS_factor
            [beta,exitflag,lambda,f,Vbeta1,Vbeta2,Vbeta3,bcorr1,bcorr2,bcorr3] = ...
                LS_factor(Y,XX,R,'silent',10^-8,'m1',zeros(size(XX,1),1),repMIN,repMAX,2,2);
            if exitflag <= 0
                disp('QMLE OPTIMIZATION PROBLEM, may need to increase repMIN or repMAX or optimset-parameters in function qmle')  
            end 
            beta_LS_tmp(:,i_R) = beta(1);                                      
            beta_LS_BC1_tmp(:,i_R) = beta(1)-bcorr2(1)-bcorr3(1);              
            beta_LS_BC2_tmp(:,i_R) = beta(1)-bcorr1(1)-bcorr2(1)-bcorr3(1);    
            beta_LS_std1_tmp(:,i_R) = sqrt(Vbeta1(1,1));                       
            beta_LS_std2_tmp(:,i_R) = sqrt(Vbeta2(1,1));                       
            beta_LS_std3_tmp(:,i_R) = sqrt(Vbeta3(1,1));                       
            % Acquire the preliminary estimate
            Gamma_LS = lambda*f';
            % Perform honest weak factor
            [beta, bias, se, LB, UB, LB_s, UB_s, A] = honest_weak_factors(Y, XX, R, Gamma_LS, 0.05, clustered_se_flag);
            beta_h_tmp(:,i_R) = beta(1);
            beta_h_bias_tmp(:,i_R) = bias(1);
            beta_h_se_tmp(:,i_R) = se(1);
            beta_h_LB_tmp(:,i_R) = LB(1);
            beta_h_UB_tmp(:,i_R) = UB(1);
            beta_h_LB_s_tmp(:,i_R) = LB_s(1);
            beta_h_UB_s_tmp(:,i_R) = UB_s(1);    
            beta_h_lind_tmp(:,i_R) = lind_fn(A(1,:,:));
        end
        % Store the results
        beta_LS(rep,:) = beta_LS_tmp;
        beta_LS_BC1(rep,:) = beta_LS_BC1_tmp;
        beta_LS_BC2(rep,:) = beta_LS_BC2_tmp;
        beta_LS_std1(rep,:) = beta_LS_std1_tmp;
        beta_LS_std2(rep,:) = beta_LS_std2_tmp;
        beta_LS_std3(rep,:) = beta_LS_std3_tmp;
        beta_h(rep,:) = beta_h_tmp;
        beta_h_bias(rep,:) = beta_h_bias_tmp;
        beta_h_se(rep,:) = beta_h_se_tmp;
        beta_h_LB(rep,:) = beta_h_LB_tmp;
        beta_h_UB(rep,:) = beta_h_UB_tmp;
        beta_h_LB_s(rep,:) = beta_h_LB_s_tmp;
        beta_h_UB_s(rep,:) = beta_h_UB_s_tmp;
        beta_h_lind(rep,:) = beta_h_lind_tmp;  
        if  ~mod(rep,1000) || (rep == 1)
            fprintf('rep %3.0f/%1.0f, took %1.2fs\n', rep, reps, toc(ticID_iter));
        end
    end
    % ========================================
    % Prepare for outputting the results
    % ========================================
    % Calculate summary statistics for least squared estimates
    out.params = params;
    out.LS.beta = [beta_LS beta_LS_BC1 beta_LS_BC2];
    out.LS.se = cat(3,repmat(beta_LS_std1,[1 3]),repmat(beta_LS_std2,[1 3]),repmat(beta_LS_std3,[1 3]));
    out.LS.stats = gen_stats(out.LS.beta, beta0, out.LS.se);
    % Calculate summary statistics for honest weak factor
    out.H.beta = beta_h;
    out.H.wc_bias = beta_h_bias;
    out.H.se =  beta_h_se;
    out.H.LB = beta_h_LB;
    out.H.UB = beta_h_UB;
    out.H.LB_s = beta_h_LB_s;
    out.H.UB_s = beta_h_UB_s;
    out.H.lind = beta_h_lind;
    out.H.stats = gen_stats(out.H.beta, beta0, out.H.se); 
    out.H.lind_stats = gen_stats(out.H.lind);
    out.H.stats = gen_stats(out.H.beta, beta0, out.H.se);
    out.H.stats.length = mean(out.H.UB - out.H.LB);
    out.H.stats.length_s = mean(out.H.UB_s - out.H.LB_s);
    out.H.stats.size = mean( (beta0 < out.H.LB) | (beta0 > out.H.UB) );
    out.H.stats.size_s = mean( (beta0 < out.H.LB_s) | (beta0 > out.H.UB_s) );
    % ========================================
    % Save the results
    % ========================================
    if isfield(params,'outputfile')
        outputfile = params.outputfile;
    else
        outputfile = sprintf(['%s/mc_DGP%i_N%i_T%i_R0%i_kappa' repmat('%03d_',1,length(kappa)) 'reps%i'], ...
            folder_name,DGP_id,N,T,R0,round(kappa*100),reps);
    end
    disp(['# Save result to file:' outputfile])
    save([outputfile '.mat'],'out');
end


%%
% Auxiliary Function: rng_for_current_rep
%   Description:
%       Function to set and ensure that each replication (rep) gets a different, but reproducible
%       stream of randomization
%   Inputs:
%       rep: Current index of replication

function rng_for_current_rep(rep)
    rng(1091+7717*rep, 'dsfmt19937'); 
end


%%
% Auxiliary Function: get_dgp_fn
%   Description:
%       This function generates data from different Data Generating Processes (DGPs), based on a
%       selected DGP_id and other parameters
%   Inputs:
%       N: Number of units
%       T: Number of time periods
%       beta0: True parameters
%       R0: Number of latent factors
%       kappa: The strength of factors
%       DGP_id: Selecting DGP design, where 1 indicates a static design and 2 indicates a design
%           with additional covariate and heteroskedastic serially correlated errors
%   Outputs:
%       Y: Outcome variable in the N * T array
%       XX: Covariates in the K * N * T array

function [Y,XX] = get_dgp_fn (N,T,beta0,R0,kappa,DGP_id)
    if nargin < 6
        DGP_id = 1;
    end
    switch DGP_id
        % DGP design 1: simple factor model
        case 1
            sigma_u = 1;
            sigma_eps_x = 1;
            % Factors and factor loadings
            lambda0 = randn(N,R0);
            f0 = randn(T,R0);
            % Covariate
            X = lambda0*f0' + sigma_eps_x*randn(N,T);
            % Outcome
            Y = beta0 * X + bsxfun(@times,lambda0,kappa)*f0' + sigma_u*randn(N,T);
            XX = zeros(1,N,T);
            XX(1,:,:) = X;
        % DGP design 2: with additional covariate and heteroskedastic serially correlated errors
        case 2
            sigma_u = 1;
            theta_ma = 1/sqrt(2);
            nu = 5;
            % Scaled t-distributed errors
            sigma_eps = sigma_u/sqrt(nu/(nu-2)*(1+theta_ma^2));
            sigma_eps_w = 1;
            rho_xz = 1/sqrt(2);
            delta0 = 1;
            % Factors and factor loadings
            lambda0 = randn(N,R0);
            f0 = randn(T+1,R0);
            % Generate covariates with correlation between them
            eps_x = randn(N,T+1);
            eps_z = rho_xz*eps_x + sqrt(1 - rho_xz^2)*randn(N,T+1);
            X = lambda0*f0' + sigma_eps_w*eps_x;
            Z = lambda0*f0' + sigma_eps_w*eps_z;
            XX = zeros(2,N,T);
            XX(1,:,:) = X(:,2:end);
            XX(2,:,:) = Z(:,2:end);
            % Heteroskedasticity
            h_ind = (X + Z + lambda0*f0')/3;          
            eps = sigma_eps*sqrt(0.5 + 1 * exp(h_ind)./(1+exp(h_ind))).*trnd(nu,[N,T+1]);
            % Serially correlation
            U = eps(:,2:end)+theta_ma*eps(:,1:end-1);
            % Outcome
            Y = beta0 * X(:,2:end) + delta0 * Z(:,2:end) + bsxfun(@times,lambda0,kappa)*f0(2:end,:)' + U;
    end        
end
